/*******************************************************************************
*Project :          COVID-19 Research Project 
*Purpose : 		    Clean USDA county-level demographic data.
*Source : 			https://www.ers.usda.gov/data-products/county-level-data-sets/download-data/
*******************************************************************************/

clear all 
set more off 
capture log close 


*===============================================================================
* Clean USDA Population Data 
*===============================================================================

* Read in county-level data, merge together, and save

import delimited using "data/raw/usda_ers/PopulationEstimates.csv"
tempfile pop
save "`pop'"

import delimited using "data/raw/usda_ers/Education.csv", clear
rename fipscode fips
rename v44 pct_lt_highschool_2014_2018
rename v45 pct_highschool_2014_2018
rename v46 pct_somecollege_2014_2018
rename v47 pct_college_plus_2014_2018
tempfile educ
save "`educ'"

import delimited using "data/raw/usda_ers/PovertyEstimates.csv", clear
rename fipstxt fips
tempfile povert
save "`povert'"

import delimited using "data/raw/usda_ers/Unemployment.csv", clear

merge 1:1 fips using "`pop'", nogen
merge 1:1 fips using "`educ'", nogen
merge 1:1 fips using "`povert'", nogen
describe

keep fips metro_2013 pop_estimate_2018 births_2018 deaths_2018 gq_estimates_2018 ///
	civilian_labor_force_2018 employed_2018 unemployed_2018 unemployment_rate_2018 median_household_income_2018 ///
	pct_lt_highschool_2014_2018 pct_highschool_2014_2018 pct_somecollege_2014_2018 pct_college_plus_2014_2018 ///
	ruralurban_continuum_code_2013 pctpovall_2018

*Subset to valid counties, dropping state-level aggregates and U.S. aggregates
keep if !missing(metro_2013)

  
*Make variables numeric

foreach var of varlist * {
	destring `var', replace ignore(",$")
}


*Make percentages range from 0 to 1

foreach var of varlist unemployment_rate_2018 pct* {
	replace `var' = `var'/100
}


rename fips county_fips
format county_fips %05.0f

compress
save "data/clean/usda_ers/county_information.dta" , replace 
